Import and preprocess data

Paired samples were obtained from each subject. For UC patients, these were inflamed / UC tissue ("inflamed") and adjacent normal tissue ("non-inflamed"). Similarly, for healthy patients two location-matched samples were obtained. Note they do not expect the non-inflamed samples to be similar to the healthy samples. From the paper: “We confirmed that expression of an inflammation-associated gene set increased from healthy to non-inflamed to inflamed samples”.

Question of interest:

  • Within epithelial / lamina propria locations separately: Healthy vs non-inflamed vs inflamed effect. Model for each cell type includes random effect for patient, and ‘Health’ fixed effect. Allows us to confirm and assess systematic changes when moving from healthy > non-inflamed > inflamed.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.table("~/Downloads/all.meta2.txt", sep="\t", skip=2)
colnames(data) <- c("NAME", "Cluster",  "nGene",    "nUMI", "Subject",  "Health",   "Location", "Sample")

table(data$Cluster)
## 
##       Best4+ Enterocytes    CD4+ Activated Fos-hi    CD4+ Activated Fos-lo 
##                     3286                    10406                     9421 
##              CD4+ Memory                CD4+ PD1+               CD69- Mast 
##                    26809                      417                      143 
##               CD69+ Mast                CD8+ IELs               CD8+ IL17+ 
##                     5654                     3576                      490 
##                  CD8+ LP                Cycling B        Cycling Monocytes 
##                    14935                     2211                      350 
##                Cycling T               Cycling TA                      DC1 
##                      557                    18204                      428 
##                      DC2              Endothelial   Enterocyte Progenitors 
##                     2391                     2516                     9713 
##              Enterocytes          Enteroendocrine               Follicular 
##                     4517                      677                    21468 
##                       GC                     Glia                   Goblet 
##                      916                     1262                     2113 
##                     ILCs   Immature Enterocytes 1   Immature Enterocytes 2 
##                      509                     7146                     9726 
##          Immature Goblet Inflammatory Fibroblasts   Inflammatory Monocytes 
##                     9554                     2268                     1652 
##                  M cells              Macrophages            Microvascular 
##                      441                    16692                      663 
##                    MT-hi           Myofibroblasts                      NKs 
##                      442                     1988                     2023 
##                Pericytes                   Plasma   Post-capillary Venules 
##                      774                    82651                     2367 
##                   RSPO3+             Secretory TA                     Stem 
##                      362                     4882                     2403 
##                     TA 1                     TA 2                    Tregs 
##                    33671                    15774                     6473 
##                     Tuft            WNT2B+ Fos-hi          WNT2B+ Fos-lo 1 
##                      899                     4309                     6350 
##          WNT2B+ Fos-lo 2                 WNT5B+ 1                 WNT5B+ 2 
##                     3158                     2355                     3500
table(data$Subject) # 30 patients
## 
##   N10  N106   N11  N110  N111   N12   N13   N14   N15   N16   N17   N18   N19 
## 16643  7542  6799 10404 25386  2364  4695  4952 10649  5417  5781  8059  7380 
##   N20   N21   N23   N24   N26   N44   N46   N49   N50   N51   N52  N539   N58 
##  9563  6952 13180 13365 11224 14437 10317  8784 10501 23011 33330  7794 25266 
##  N661    N7    N8    N9 
## 43187  4446  2224 11840
#table(data$Subject, data$Sample)
## For UC patients: inflamed (UC) and adjacent non-inflamed tissue.
table(data$Subject, data$Health)
##       
##        Healthy Inflamed Non-inflamed
##   N10    16643        0            0
##   N106       0     4848         2694
##   N11     6799        0            0
##   N110       0     3834         6570
##   N111       0    19648         5738
##   N12        0     1355         1009
##   N13     4695        0            0
##   N14        0     2276         2676
##   N15    10649        0            0
##   N16     5417        0            0
##   N17     5781        0            0
##   N18     8059        0            0
##   N19        0     2423         4957
##   N20     9563        0            0
##   N21     6952        0            0
##   N23        0     6341         6839
##   N24        0     5511         7854
##   N26        0     4360         6864
##   N44        0     6392         8045
##   N46    10317        0            0
##   N49        0     4015         4769
##   N50        0     4917         5584
##   N51    23011        0            0
##   N52        0    12901        20429
##   N539       0     3345         4449
##   N58        0    17599         7667
##   N661       0    18589        24598
##   N7         0     1833         2613
##   N8      2224        0            0
##   N9         0     4932         6908

Disease effect in epithelial cells

EDA

dataEpi <- data[data$Location == "Epi",]

dataEpi <- dataEpi %>%
  group_by(Sample, Cluster, Subject, Health) %>%
  summarize(nCells = n()) 
## `summarise()` has grouped output by 'Sample', 'Cluster', 'Subject'. You can
## override using the `.groups` argument.
library(tidyverse)
dataEpiWide <- pivot_wider(dataEpi,
                           id_cols = c("Sample", "Subject", "Health"),
                           names_from = "Cluster",
                           values_from = "nCells")
dataEpiWide[is.na(dataEpiWide)] <- 0

dataEpiCountMatrix <- as.matrix(t(dataEpiWide[,-c(1:3)]))
colnames(dataEpiCountMatrix) <- dataEpiWide$Sample
dataEpiMeta <- dataEpiWide[,1:3]

keep <- rowSums(dataEpiCountMatrix > 4) >= 6
dataEpiCountMatrix <- dataEpiCountMatrix[keep,]
dataEpi <- dataEpi[dataEpi$Cluster %in% rownames(dataEpiCountMatrix),]
dataEpi2 <- dataEpi %>%
  ungroup() %>%
  group_by(Sample) %>%
  mutate(nCellTypes = n(),
         geoMean = prod(nCells+1)^(1/nCellTypes),
         CLR = log((nCells+1) / geoMean ))
dataEpi2$Health <- factor(dataEpi2$Health, levels=c("Healthy", "Non-inflamed", "Inflamed"))

ggplot(dataEpi2, aes(x=CLR, y=Cluster, fill=Health)) +
  geom_boxplot(outlier.size = 0) +
  #geom_point(position = position_dodge(width = .75), size=1/5) +
  theme_classic()

DA analysis

voomCLR

library(limma)
## Warning: package 'limma' was built under R version 4.2.2
library(voomCLR)
design <- model.matrix(~ Health, data=dataEpiMeta)

v <- voomCLR(counts = dataEpiCountMatrix,
        design = design,
        block = factor(dataEpiMeta$Subject),
        plot = TRUE)

cf <- duplicateCorrelation(v, design, block=factor(dataEpiMeta$Subject))
v <- voomCLR(counts = dataEpiCountMatrix,
        design = design,
        block = factor(dataEpiMeta$Subject),
        plot = TRUE,
        correlation = cf$consensus.correlation)

cf <- duplicateCorrelation(v, design, block=factor(dataEpiMeta$Subject))
fit <- lmFit(v, 
             design,
             block = factor(dataEpiMeta$Subject),
             correlation = cf$consensus.correlation)
L <- matrix(0, nrow=ncol(fit$coefficients), ncol=3,
            dimnames = list(colnames(fit$coefficients), c("NIvsH", "IvsH", "IvsNI")))
L["HealthNon-inflamed",c("NIvsH")] <- 1
L["HealthInflamed",c("IvsH")] <- 1
L[c("HealthInflamed", "HealthNon-inflamed"),3] <- c(1, -1)
conFit <- contrasts.fit(fit, contrasts = L)
conFit <- eBayes(conFit)
plotBeta(conFit)

resList <- list()
for(cc in 1:3){
  ttbc <- topTableBC(conFit,
                     coef=cc,
                     number=Inf,
                     sort.by="none")#,
                     #bootstrap="nonparametric")
  resList[[cc]] <- ttbc
}

LinDA

## linda
library(MicrobiomeStat)
library(phyloseq)
## Warning: package 'phyloseq' was built under R version 4.2.2
lindaRes <- LinDA::linda(otu.tab = dataEpiCountMatrix, # rows features, cols samples
                                    meta = as.data.frame(dataEpiMeta),
                                    formula = '~ Health + (1|Subject)',
                                    type = 'count',
                                   adaptive=TRUE,
                                   imputation = FALSE)
## Imputation approach is used.
# inflamed vs healthy
lindaIvsH <- lindaRes$output$HealthInflamed
# non-inflamed vs healthy
lindaNIvsH <- lindaRes$output$`HealthNon-inflamed`
## inflamed vs non-inflamed: code from developers on GitHub
alp1 <- lindaRes$output$HealthInflamed$log2FoldChange
se1 <- lindaRes$output$HealthInflamed$lfcSE
df1 <- lindaRes$output$HealthInflamed$df

alp3 <- lindaRes$output$`HealthNon-inflamed`$log2FoldChange
se3 <- lindaRes$output$`HealthNon-inflamed`$lfcSE
df3 <- lindaRes$output$`HealthNon-inflamed`$df

cov13 <- lindaRes$covariance$HealthInflamed$`HealthNon-inflamed`

stat_linDA <- (alp1-alp3) / (sqrt(se1^2+se3^2-2*cov13))
l2fc_linDA <- alp1-alp3
names(stat_linDA) <- names(l2fc_linDA) <- rownames(lindaRes$output$SLE_statusSLE)
pvalue_linDA <- 2 * pt(-abs(stat_linDA), (df1+df3)/2)
padj_linDA <- p.adjust(pvalue_linDA, method = 'BH')
lindaIvsNI <- data.frame(log2FoldChange=l2fc_linDA,
                         stat=stat_linDA, 
                         pvalue=pvalue_linDA,
                         padj=padj_linDA,
                         row.names=rownames(lindaRes$output$HealthInflamed))

NB GLM

library(glmmTMB)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
resDfNBIvsH <- resDfNBNIvsH <- resDfNBIvsNI <- data.frame(population=rownames(dataEpiCountMatrix),
                      diff=rep(NA,nrow(dataEpiCountMatrix)),
                      se=rep(NA,nrow(dataEpiCountMatrix)),
                      pval=rep(NA,nrow(dataEpiCountMatrix)))
for(pp in 1:nrow(dataEpiCountMatrix)){
  curY <- dataEpiCountMatrix[pp,]
  curDf <- cbind(dataEpiMeta, y=curY)
  m_p <- glmmTMB(y ~ Health + (1|Subject), 
                 data=curDf,
                 family=nbinom2(link="log"),
                 offset = log(colSums(dataEpiCountMatrix)))
  # inflamed vs healthy
  resDfNBIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[2,1],
                       summary(m_p)$coefficients$cond[2,2],
                       summary(m_p)$coefficients$cond[2,4])
  # non-inflamed vs healthy
  resDfNBNIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[3,1],
                       summary(m_p)$coefficients$cond[3,2],
                       summary(m_p)$coefficients$cond[3,4])
  
  ## inflamed vs non-inflamed: contrast
  L <- matrix(0, nrow=nrow(summary(m_p)$coef$cond), ncol=1)
  rownames(L) <- rownames(summary(m_p)$coef$cond)
  L[c("HealthInflamed", "HealthNon-inflamed"),1] <- c(1,-1)
  m_p_contrast <- glht(m_p, 
       linfct=t(L),
        coef. = function(x) fixef(x)[["cond"]],
        vcov. = function(x) vcov(x)[["cond"]],
       df=NULL)
  

    resDfNBIvsNI[pp,2:4] <- c(summary(m_p_contrast)$test$coefficients, 
                         summary(m_p_contrast)$test$sigma,
                         summary(m_p_contrast)$test$pvalues)
}


resDfNBIvsH$padj <- p.adjust(resDfNBIvsH$pval, "fdr")
resDfNBNIvsH$padj <- p.adjust(resDfNBNIvsH$pval, "fdr")
resDfNBIvsNI$padj <- p.adjust(resDfNBIvsNI$pval, "fdr")

Summary of results

voomCLR

nPopulations <- nrow(dataEpiCountMatrix)
resDfAll <- data.frame(population = unlist(lapply(resList, rownames)),
                       contrast = rep(c("NIvsH", "IvsH", "IvsNI"), each=nPopulations),
                       log2FC = unlist(lapply(resList, function(x) x$logFC)),
                       padj = unlist(lapply(resList, function(x) x$adj.P.Val))
                       
)
resDfAll$DA <- resDfAll$padj <= 0.05

pHeatVoomCLR <- ggplot(resDfAll, 
       aes(x=contrast, y=population, color=log2FC, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatVoomCLR
## Warning: Using size for a discrete variable is not advised.

LinDA

lindaIvsH$contrast <- "IvsH"
lindaIvsH$population <- rownames(lindaIvsH)
lindaIvsNI$contrast <- "IvsNI"
lindaIvsNI$population <- rownames(lindaIvsNI)
lindaNIvsH$contrast <- "NIvsH"
lindaNIvsH$population <- rownames(lindaNIvsH)
selectCols <- c("population","log2FoldChange", "stat", "pvalue", "padj", "contrast")
resDfAllLinDA <- rbind(lindaIvsH[,selectCols],
                       lindaIvsNI[,selectCols],
                       lindaNIvsH[,selectCols])
resDfAllLinDA$DA <- resDfAllLinDA$padj <= 0.05

pHeatLinDA <- ggplot(resDfAllLinDA, 
       aes(x=contrast, y=population, color=log2FoldChange, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatLinDA
## Warning: Using size for a discrete variable is not advised.

NB-GLM

resDfNBIvsH$contrast <- "IvsH"
resDfNBIvsNI$contrast <- "IvsNI"
resDfNBNIvsH$contrast <- "NIvsH"
resDfAllNB <- rbind(resDfNBIvsH,
                       resDfNBIvsNI,
                       resDfNBNIvsH)
resDfAllNB$DA <- resDfAllNB$padj <= 0.05

pHeatNB <- ggplot(resDfAllNB, 
       aes(x=contrast, y=population, color=diff, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatNB
## Warning: Using size for a discrete variable is not advised.

Lamina propria

EDA

dataLP <- data[data$Location == "LP",]

dataLP <- dataLP %>%
  group_by(Sample, Cluster, Subject, Health) %>%
  summarize(nCells = n()) 
## `summarise()` has grouped output by 'Sample', 'Cluster', 'Subject'. You can
## override using the `.groups` argument.
library(tidyverse)
dataLPWide <- pivot_wider(dataLP,
                           id_cols = c("Sample", "Subject", "Health"),
                           names_from = "Cluster",
                           values_from = "nCells")
dataLPWide[is.na(dataLPWide)] <- 0

dataLPCountMatrix <- as.matrix(t(dataLPWide[,-c(1:3)]))
colnames(dataLPCountMatrix) <- dataLPWide$Sample
dataLPMeta <- dataLPWide[,1:3]

keep <- rowSums(dataLPCountMatrix > 4) >= 6
dataLPCountMatrix <- dataLPCountMatrix[keep,]
dataLP <- dataLP[dataLP$Cluster %in% rownames(dataLPCountMatrix),]
dataLP2 <- dataLP %>%
  ungroup() %>%
  group_by(Sample) %>%
  mutate(nCellTypes = n(),
         geoMean = prod(nCells+1)^(1/nCellTypes),
         CLR = log((nCells+1) / geoMean ))
dataLP2$Health <- factor(dataLP2$Health, levels=c("Healthy", "Non-inflamed", "Inflamed"))

ggplot(dataLP2, aes(x=CLR, y=Cluster, fill=Health)) +
  geom_boxplot(outlier.size = 0) +
  #geom_point(position = position_dodge(width = .75), size=1/5) +
  theme_classic()

DA analysis

voomCLR

library(limma)
library(voomCLR)
design <- model.matrix(~ Health, data=dataLPMeta)

v <- voomCLR(counts = dataLPCountMatrix,
        design = design,
        block = factor(dataLPMeta$Subject),
        plot = TRUE)

cf <- duplicateCorrelation(v, design, block=factor(dataLPMeta$Subject))
v <- voomCLR(counts = dataLPCountMatrix,
        design = design,
        block = factor(dataLPMeta$Subject),
        plot = TRUE,
        correlation = cf$consensus.correlation)

cf <- duplicateCorrelation(v, design, block=factor(dataLPMeta$Subject))
fit <- lmFit(v, 
             design,
             block = factor(dataLPMeta$Subject),
             correlation = cf$consensus.correlation)
L <- matrix(0, nrow=ncol(fit$coefficients), ncol=3,
            dimnames = list(colnames(fit$coefficients), c("NIvsH", "IvsH", "IvsNI")))
L["HealthNon-inflamed",c("NIvsH")] <- 1
L["HealthInflamed",c("IvsH")] <- 1
L[c("HealthInflamed", "HealthNon-inflamed"),3] <- c(1, -1)
conFit <- contrasts.fit(fit, contrasts = L)
conFit <- eBayes(conFit)
plotBeta(conFit)

resList <- list()
for(cc in 1:3){
  ttbc <- topTableBC(conFit,
                     coef=cc,
                     number=Inf,
                     sort.by="none")#,
                     #bootstrap="nonparametric")
  resList[[cc]] <- ttbc
}

LinDA

## linda
library(MicrobiomeStat)
library(phyloseq)
lindaRes <- LinDA::linda(otu.tab = dataLPCountMatrix, # rows features, cols samples
                                    meta = as.data.frame(dataLPMeta),
                                    formula = '~ Health + (1|Subject)',
                                    type = 'count',
                                   adaptive=TRUE,
                                   imputation = FALSE)
## Imputation approach is used.
# inflamed vs healthy
lindaIvsH <- lindaRes$output$HealthInflamed
# non-inflamed vs healthy
lindaNIvsH <- lindaRes$output$`HealthNon-inflamed`
## inflamed vs non-inflamed: code from developers on GitHub
alp1 <- lindaRes$output$HealthInflamed$log2FoldChange
se1 <- lindaRes$output$HealthInflamed$lfcSE
df1 <- lindaRes$output$HealthInflamed$df

alp3 <- lindaRes$output$`HealthNon-inflamed`$log2FoldChange
se3 <- lindaRes$output$`HealthNon-inflamed`$lfcSE
df3 <- lindaRes$output$`HealthNon-inflamed`$df

cov13 <- lindaRes$covariance$HealthInflamed$`HealthNon-inflamed`

stat_linDA <- (alp1-alp3) / (sqrt(se1^2+se3^2-2*cov13))
l2fc_linDA <- alp1-alp3
names(stat_linDA) <- names(l2fc_linDA) <- rownames(lindaRes$output$SLE_statusSLE)
pvalue_linDA <- 2 * pt(-abs(stat_linDA), (df1+df3)/2)
padj_linDA <- p.adjust(pvalue_linDA, method = 'BH')
lindaIvsNI <- data.frame(log2FoldChange=l2fc_linDA,
                         stat=stat_linDA, 
                         pvalue=pvalue_linDA,
                         padj=padj_linDA,
                         row.names=rownames(lindaRes$output$HealthInflamed))

NB GLM

library(glmmTMB)
library(multcomp)
resDfNBIvsH <- resDfNBNIvsH <- resDfNBIvsNI <- data.frame(population=rownames(dataLPCountMatrix),
                      diff=rep(NA,nrow(dataLPCountMatrix)),
                      se=rep(NA,nrow(dataLPCountMatrix)),
                      pval=rep(NA,nrow(dataLPCountMatrix)))
for(pp in 1:nrow(dataLPCountMatrix)){
  curY <- dataLPCountMatrix[pp,]
  curDf <- cbind(dataLPMeta, y=curY)
  m_p <- glmmTMB(y ~ Health + (1|Subject), 
                 data=curDf,
                 family=nbinom2(link="log"),
                 offset = log(colSums(dataLPCountMatrix)))
  # inflamed vs healthy
  resDfNBIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[2,1],
                       summary(m_p)$coefficients$cond[2,2],
                       summary(m_p)$coefficients$cond[2,4])
  # non-inflamed vs healthy
  resDfNBNIvsH[pp,2:4] <- c(summary(m_p)$coefficients$cond[3,1],
                       summary(m_p)$coefficients$cond[3,2],
                       summary(m_p)$coefficients$cond[3,4])
  
  ## inflamed vs non-inflamed: contrast
  L <- matrix(0, nrow=nrow(summary(m_p)$coef$cond), ncol=1)
  rownames(L) <- rownames(summary(m_p)$coef$cond)
  L[c("HealthInflamed", "HealthNon-inflamed"),1] <- c(1,-1)
  m_p_contrast <- glht(m_p, 
       linfct=t(L),
        coef. = function(x) fixef(x)[["cond"]],
        vcov. = function(x) vcov(x)[["cond"]],
       df=NULL)
  

    resDfNBIvsNI[pp,2:4] <- c(summary(m_p_contrast)$test$coefficients, 
                         summary(m_p_contrast)$test$sigma,
                         summary(m_p_contrast)$test$pvalues)
}


resDfNBIvsH$padj <- p.adjust(resDfNBIvsH$pval, "fdr")
resDfNBNIvsH$padj <- p.adjust(resDfNBNIvsH$pval, "fdr")
resDfNBIvsNI$padj <- p.adjust(resDfNBIvsNI$pval, "fdr")

Summary of results

voomCLR

nPopulations <- nrow(dataLPCountMatrix)
resDfAll <- data.frame(population = unlist(lapply(resList, rownames)),
                       contrast = rep(c("NIvsH", "IvsH", "IvsNI"), each=nPopulations),
                       log2FC = unlist(lapply(resList, function(x) x$logFC)),
                       padj = unlist(lapply(resList, function(x) x$adj.P.Val))
                       
)
resDfAll$DA <- resDfAll$padj <= 0.05

pHeatVoomCLR <- ggplot(resDfAll, 
       aes(x=contrast, y=population, color=log2FC, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatVoomCLR
## Warning: Using size for a discrete variable is not advised.

LinDA

lindaIvsH$contrast <- "IvsH"
lindaIvsH$population <- rownames(lindaIvsH)
lindaIvsNI$contrast <- "IvsNI"
lindaIvsNI$population <- rownames(lindaIvsNI)
lindaNIvsH$contrast <- "NIvsH"
lindaNIvsH$population <- rownames(lindaNIvsH)
selectCols <- c("population","log2FoldChange", "stat", "pvalue", "padj", "contrast")
resDfAllLinDA <- rbind(lindaIvsH[,selectCols],
                       lindaIvsNI[,selectCols],
                       lindaNIvsH[,selectCols])
resDfAllLinDA$DA <- resDfAllLinDA$padj <= 0.05

pHeatLinDA <- ggplot(resDfAllLinDA, 
       aes(x=contrast, y=population, color=log2FoldChange, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatLinDA
## Warning: Using size for a discrete variable is not advised.

NB-GLM

resDfNBIvsH$contrast <- "IvsH"
resDfNBIvsNI$contrast <- "IvsNI"
resDfNBNIvsH$contrast <- "NIvsH"
resDfAllNB <- rbind(resDfNBIvsH,
                       resDfNBIvsNI,
                       resDfNBNIvsH)
resDfAllNB$DA <- resDfAllNB$padj <= 0.05

pHeatNB <- ggplot(resDfAllNB, 
       aes(x=contrast, y=population, color=diff, size=DA)) +
  geom_point()+
  theme_light()+
  scale_color_gradient2(low= "blue", mid="gray", high= "red") +
  theme(strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1)) +
  xlab("Contrast") +
  ylab("Cell type")
pHeatNB
## Warning: Using size for a discrete variable is not advised.